version 11.1
#delimit;
clear;
pause on;
set more off;
 quietly log;
  local logon = r(status);
  if "`logon'" == "on" {; log close; };
log using table3, text replace;

	
/************************************************************************/
/* 	Author:		Olga Chyzh and Fred Boehmke			*/
/*	Date:		May 22, 2015			   	  	*/
/*      File:		table3.do				  	*/
/*	Purpose:	Replicate HB&M to use as an example of 		*/
/*			centrality. 					*/
/*      Input File:	hb&m2008replication.dta				*/
/*      Output File:	table3.txt,					*/
/*			table9.txt					*/
/*	Requires:	relogit (Gary King), 				*/
/*			st0248 (Stata network package)			*/
/*			estout (Stata package)				*/
/************************************************************************/
	
clear;	
clear matrix;
clear mata;
set mem 500m;
set seed 140;

program drop _all;

tempfile data temp;

	
	/*****************************************************************/
	/* This program combines the results from the imputed data sets. */
	/*****************************************************************/

program define midisp;

	local names: colnames(Q1);

	matrix Q = (Q1 + Q2 + Q3 + Q4 + Q5+ Q6 +Q7+ Q8 +Q9+ Q10)/10;		/* MI param estimates */
	matrix W = (W1 + W2 + W3 + W4 + W5 + W6 +W7 + W8 + W9+ W10)/10;		/* MI covariances     */
	local  k = colsof(Q);

	forvalues i=1/10 {;

	  matrix QQ=Q`i'-Q;

	  if `i'==1 {;

		matrix B=(QQ)'*QQ;

		};

	  else matrix B = B + (QQ)'*QQ;


	};

	matrix B=B/(10-1);				/* Covariance adjustment */

	matrix V=W+(1+1/10)*B;				/* Final covariance   */

	ereturn post Q V;
	ereturn display, plus neq(1);
	
	

end;


	/***************************************************/
	/* This program creates triadic closure variable.  */
	/***************************************************/
	

program define triad, rclass;

		/*Need to give the variable name denoting id1 and stub denoting j*/

  syntax  varlist [if] [in], id1(varname) id2(varname); 

		/*Making id variables consecutive */

	tempfile ids data;


	marksample touse;
		
		quietly {;
		keep if `touse';
		keep *`varlist' `id1' `id2';
		};
		
		sort `id1' `id2';
		
		quietly {;
		reshape wide `varlist', i(`id1') j(`id2');
		reshape long `varlist', i(`id1') j(`id2'); 
		egen i = group(`id1');
		bysort i: generat j = _n;
		save `data', replace;
		keep `id1' `id2' i j;
		
		sort i j;
		sum;
		save `ids', replace;
		sum i;
		};
		
		local n=r(max);
		
		clear;
	quietly	{;
		set obs `n';
		gen i=_n;
		expand `n';
		bysort i: gen j=_n;
		
		merge 1:1 i j using `data';
		drop _merge `id1' `id2';  
		sum i j;
		};

	/*Saving a matrix in which missing values are denoted by -99*/
	quietly recode `varlist' (.=-99);

	/*Need to reshape wide to fit within Stata resptrictions.*/
	quietly reshape wide `varlist', i(i) j(j);
	mkmat `varlist'*, matrix(Miss);
	quietly reshape long `varlist', i(i) j(j);

	/*Now recode missing as 0 for the time being*/
	quietly recode `varlist' (-99=0);

	quietly sum i;
	local n=r(max);

	quietly reshape wide `varlist', i(i) j(j);

	mkmat `varlist'*, matrix(M);
	matrix M2=M*M;


	svmat M2, names(m2);


	forvalues k=1/`n' {;
	quietly replace m2`k'=1 if m2`k'>1 & m2`k'!=. ;


	*replace ND`i'=0 if ND`i'<0;
	};
	mkmat m2*, matrix(M2);


	/*This command would only count indirect links if there is not a direct link*/
	*matrix ND=M2-M;
	matrix ND=M2;

	svmat ND, names(ND);

	/*Now need to bring back the matrix, in which missings were denoted by -99*/
	svmat Miss, names(miss);

	quietly {;
		reshape long `varlist' ND miss, i(i) j(j);
		replace ND=0 if i==j;

		*replace ND=0 if miss==-99;
		replace `varlist'=. if miss==-99;

		drop miss*;
		reshape wide `varlist' ND , i(i) j(j);
		};

	order i `varlist'* ND*;
	forvalues k=1/`n' {;
		quietly replace ND`k'=0 if ND`k'<0;

	};

	  quietly {;
		drop m*;
		reshape long `varlist' ND, i(i) j(j);
		rename ND triad_`varlist';

		merge 1:1 i j using `ids';
		drop i j _merge;
		drop if `id1'==. | `id2'==.;
		drop if `id1'==`id2';
		};
			
end;


		/*Opening HB&M replication data*/

use hb&m2008replication.dta, clear;

		/*Get the naive model's sample*/

relogit sanctOn
PTASameGT0_lag PTA_traBilMax_lag PTA_rgdp1_lag PTA_rgdp2_lag /*PTA hyp*/
PTACent1_lag PTAClusSize1_lag PTAClusSame_lag/*SNA hyp*/
pol1_lag pol2_lag traBilMax_lag /*Lib hyp*/
rgdp1_lag rgdp2_lag allies_lag /*Rea hyp*/,
cluster(dyadid);
gen naive_sample=1 if e(sample);

		/* Generate Centrality score. */

preserve;

  clear;

  tempfile netsum;

  save `netsum', replace emptyok;

restore;

levelsof year, local(years);

foreach year of local years {;

  preserve;

  keep if PTASame>0 & !missing(PTASame) & year==`year';
  
  keep ct_idc1 ct_idc2 year dyadid PTASame;
  
  netsis ct_idc1 ct_idc2, measure(adjacency) weight(PTASame) name(A, replace);

  netsummarize A/(rows(A)-1), generate(PTA_centr_net) statistic(rowsum);
  
	append using `netsum';
  
	save `netsum', replace;
  
  restore;
  
  };
  
  merge 1:1 dyadid year using `netsum', keep(match master) generat(_merge_netsum);
  
  replace PTA_centr_net_source = 0 if missing(PTA_centr_net_source);
  
		/*The following commands generate lagged_degree_centrality*/

egen N=count(PTASame) if !missing(PTASame), by(ct_idc1 year);
egen centr=sum(PTASame) if !missing(PTASame), by(ct_idc1 year);

gen centr_st=centr/N;

tsset dyad year;
gen centr_lag=L.centr_st;

gen PTA_centr_net_lag=L.PTA_centr_net_source;

sum centr_lag PTACent1_lag PTA_centr_net_lag;
corr centr_lag PTACent1_lag PTA_centr_net_lag;
pwcorr centr_lag PTACent1_lag PTA_centr_net_lag;

/*Generate triadic closure variable*/

save `data', replace;

clear;
tempfile triads;
save `triads', replace emptyok;

use `data', clear;

forvalues year=1948/2000 {;

	preserve;
	quietly keep if year==`year';
	quietly drop year;	
	di `year';
    
	quietly keep ct_idc1 ct_idc2 PTASameGT0_lag ;
    
	triad PTASameGT0_lag, id1(ct_idc1) id2(ct_idc2);

	quietly {;

		generat year = `year';

		append using `triads';

		save `triads', replace;

		};
				
	restore;
	noisily _dots `year' 0;
	};

quietly {;
	use `data', clear;
	  merge 1:1 ct_idc1 ct_idc2 year using `triads';
	  drop _merge;
	  save `data', replace;
	  };
			  
sum PTASameGT0_lag  triad_PTASameGT0_lag;			  

		/*Run a model that predicts PTA*/

#delimit;
regress PTASame GATT_lag  log_GDPPC1_lag log_GDPPC2_lag allies_lag joint_dem log_GDPPC_diff log_dist log_tradeAB_lag 
	contig samelang triad_PTASameGT0_lag, cluster(dyadid);

gen sample=1 if e(sample);
  matrix B = e(b);
  matrix C = e(V);

estimates store PTA;

	/* Generate Table A9. */

#delimit;
estout PTA using table9.txt, style(tab) replace 
	cells("b(fmt(3) star) se(par)")
	modelwidth(8)
	starlevels(* 0.05 ** 0.01 *** 0.001) legend
	varwidth(30)
	order(GATT_lag triad_PTASameGT0_lag log_GDPPC1_lag log_GDPPC2_lag allies_lag joint_dem log_GDPPC_diff log_dist log_tradeAB_lag contig samelang)
	collabels(, none)
	varlabels(_cons "Constant"
		GATT_lag "Shared GATT membership"
		triad_PTASameGT0_lag "Triadic Closure"
		log_GDPPC1_lag "GDP/cap A"
		log_GDPPC2_lag "GDP/cap B"
		log_tradeAB_lag  "Total Trade AB"
		allies_lag  "Allies"
		log_GDPPC_diff "GDP A-GDP B"
		joint_dem "Joint Democracy"
		log_dist "Distance, logged"
		contig "Contiguity"
		samelang "Same Language");

		/* Generate multiple draws of estimated PTA. */
		
drawnorm b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11 b0, mean(B) cov(C); 
   
forvalues i=1/10 {;

gen PTA_hat`i'= b0[`i'] + b1[`i']*GATT_lag + b2[`i']*log_GDPPC1_lag + b3[`i']*log_GDPPC2_lag + b4[`i']*allies_lag + b5[`i']*joint_dem 
	+ b6[`i']*log_GDPPC_diff + b7[`i']*log_dist + b8[`i']*log_tradeAB_lag + b9[`i']*contig+b10[`i']*samelang 
	+ b11[`i']*triad_PTASameGT0_lag;
	
		/*Predicted Centrality*/

egen N_hat`i'=count(PTA_hat`i') if !missing(PTA_hat`i'), by(ct_idc1 year);
egen centr_hat`i'=sum(PTA_hat`i') if !missing(PTA_hat`i'), by(ct_idc1 year);
replace centr_hat`i'=centr_hat`i'/N_hat`i';

tsset dyad year;
gen centr_hat_lag`i'=L.centr_hat`i';
drop N_hat`i';
	*save `data', replace;

  };


sum centr_lag centr_hat_lag* PTACent1_lag;
corr centr_lag  centr_hat* PTACent1_lag;
*save `data', replace;

  
		/* Generat the instrument of centrality Keleijian's way*/

		/*Generate polynomials of each variable*/

local varlist="log_GDPPC1_lag log_GDPPC2_lag log_GDPPC_diff log_dist log_tradeAB_lag "; 

foreach var of local varlist {;
	gen `var'_sq=`var'^2;
	gen `var'_cube=`var'^3;

	}; 

		/*Run a model that predicts PTA using polynomials. */

reg PTACent1_lag GATT_lag log_GDPPC1_lag log_GDPPC2_lag allies_lag joint_dem log_GDPPC_diff log_dist log_tradeAB_lag contig samelang
	log_GDPPC1_lag_sq log_GDPPC2_lag_sq   log_GDPPC_diff_sq log_dist_sq log_tradeAB_lag_sq  triad_PTASameGT0_lag, cluster(dyadid);

  matrix B = e(b);
  matrix C = e(V);

estimates store centr_instr;
	
		/* Generate multiple draws of estimated Centrality. */
			
drop b1-b0;
compress;
drawnorm c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 c0, mean(B) cov(C); 

  
forvalues i=1/10 {;

  gen centr_instr`i'= c0[`i'] + c1[`i']*GATT_lag + c2[`i']*log_GDPPC1_lag + c3[`i']*log_GDPPC2_lag + c4[`i']*allies_lag + c5[`i']*joint_dem 
	+ c6[`i']*log_GDPPC_diff + c7[`i']*log_dist + c8[`i']*log_tradeAB_lag + c9[`i']*contig+c10[`i']*samelang+
	c11[`i']*log_GDPPC1_lag_sq +c12[`i']*log_GDPPC2_lag_sq +c13[`i']*log_GDPPC_diff_sq+c14[`i']*log_dist_sq 
	+c15[`i']*log_tradeAB_lag_sq +c16[`i']*triad_PTASameGT0_lag;
	
  }; 

drop c1-c0;

corr PTACent1_lag centr_hat* centr_instr*;
 
		/* Estimate the Naive Model. */

relogit sanctOn
PTASameGT0_lag PTA_traBilMax_lag PTA_rgdp1_lag PTA_rgdp2_lag /*PTA hyp*/
PTACent1_lag PTAClusSize1_lag PTAClusSame_lag/*SNA hyp*/
pol1_lag pol2_lag traBilMax_lag /*Lib hyp*/
rgdp1_lag rgdp2_lag allies_lag /*Rea hyp*/,
cluster(dyadid);

estout, cells(b se(par));
estimates store naive;

		/* Estimate the naive Replication Model. */

relogit sanctOn
PTASameGT0_lag PTA_traBilMax_lag PTA_rgdp1_lag PTA_rgdp2_lag /*PTA hyp*/
centr_lag PTAClusSize1_lag PTAClusSame_lag/*SNA hyp*/
pol1_lag pol2_lag traBilMax_lag /*Lib hyp*/
rgdp1_lag rgdp2_lag allies_lag /*Rea hyp*/ if sample==1,
cluster(dyadid);

estout, cells(b se(par));
estimates store naive_repl;

		/*Corrected Model 1. */

nois _dots 0, title(Running model on each imputed data set) reps(10);

  forvalues i=1/10 {;
  
  quietly relogit sanctOn
  PTASameGT0_lag PTA_traBilMax_lag PTA_rgdp1_lag PTA_rgdp2_lag /*PTA hyp*/
  centr_hat`i' PTAClusSize1_lag PTAClusSame_lag/*SNA hyp*/
  pol1_lag pol2_lag traBilMax_lag /*Lib hyp*/
  rgdp1_lag rgdp2_lag allies_lag /*Rea hyp*/,
  cluster(dyadid);

  	matrix Q`i' = e(b);
  	matrix W`i' = e(V);
	local  ll`i' = e(ll);
	
	nois _dots `i' 0;
	
  	};

	midisp;
	
/* -estout- seems to be necessary to get it to -estimates store-. */
	
	estout, cells(b se(par));
	estimates store corrected_mi;	
	
	di "Average log-likelihood: " (`ll1' + `ll2' + `ll3' + `ll4' + `ll5'+`ll6'+`ll7'+`ll8'+`ll9'+`ll10')/10;

		/*Corrected Model 2 (Keleijian)*/

nois _dots 0, title(Running model on each imputed data set) reps(10);

  forvalues i=1/10 {;
  
  quietly relogit sanctOn
  PTASameGT0_lag PTA_traBilMax_lag PTA_rgdp1_lag PTA_rgdp2_lag /*PTA hyp*/
  centr_instr`i' PTAClusSize1_lag PTAClusSame_lag/*SNA hyp*/
  pol1_lag pol2_lag traBilMax_lag /*Lib hyp*/
  rgdp1_lag rgdp2_lag allies_lag /*Rea hyp*/,
  cluster(dyadid);

  	matrix Q`i' = e(b);
  	matrix W`i' = e(V);
	local  ll`i' = e(ll);
	
	nois _dots `i' 0;
	
  	};

	midisp;
	
		/* -estout- seems to be necessary to get it to -estimates store-. */
	
	estout, cells(b se(par));
	estimates store instr_mi;	
	
	di "Average log-likelihood: " (`ll1' + `ll2' + `ll3' + `ll4' + `ll5'+`ll6'+`ll7'+`ll8'+`ll9'+`ll10')/10;

		/* Create Table 3. */
	
	estout naive naive_repl corrected_mi instr_mi using table3.txt, style(tab) replace 
		cells(b(fmt(3) star) se(par))
		modelwidth(8)
		starlevels(* 0.05 ** 0.01 *** 0.001) legend
		varwidth(30)
		stat(N, fmt(%9.0f))
		order(PTACent1_lag centr_lag centr_hat10 centr_instr10 
		PTASameGT0_lag PTA_traBilMax_lag PTA_rgdp1_lag PTA_rgdp2_lag 
		PTAClusSize1_lag PTAClusSame_lag
		pol1_lag pol2_lag traBilMax_lag 
		rgdp1_lag rgdp2_lag allies_lag )
		collabels(, none)
		varlabels(centr_hat10	"Centrality (Estimated)"
			centr_lag	"Centrality (Replication)"
			centr_instr10	"Centrality (Instrument)"
			PTACent1_lag	"Centrality (Naive)"
			PTASameGT0_lag   "PTA"
			PTA_traBilMax_lag   "PTA*Trade (max(A,B))"
			PTA_rgdp1_lag  "PTA*GDP A"
			PTA_rgdp2_lag  "PTA*GDP B"
			PTAClusSize1_lag   "PTA*Cluster Size"
			PTAClusSame_lag  "PTA*Same Cluster"
			pol1_lag  "Polity A"
			pol2_lag  "Polity B"
			traBilMax_lag  "Trade (max(A,B))"
			rgdp1_lag  "GDP A"
			rgdp2_lag  "GDP B"
			allies_lag "Allies");

log close;
clear;
exit, STATA;

